clear all
set more off, permanently
set type double
cd ""

* Log file
capture log close
global date "`c(current_date)'"
log using "log_files/tables_$date.txt", text replace

********************************************************************************
**************** TABLE 1: Accountability and Candidates' Education *************
********************************************************************************

use "base_main.dta", clear 
global education "yrs_edu Uni Tec Sec2"	

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_yrs_edu=0.5-0.137498229198215
	gen bw_below_Uni=0.5-0.1577362629911949
	gen bw_below_Tec=0.5-0.118662983718237
	gen bw_below_Sec2=0.5-0.1376469624331328
	
	gen bw_above_yrs_edu=0.5+0.137498229198215
	gen bw_above_Uni=0.5+0.1577362629911949
	gen bw_above_Tec=0.5+0.118662983718237
	gen bw_above_Sec2=0.5+0.1376469624331328

*- Table

	foreach var of varlist $education {
	
		*- PANEL A: Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) cluster(district_year) 
		
		*- PANEL B: Linear Polynomial Regression
		reg `var' treat_1 voterecall_lag if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year) 

		*- PANEL C: Quadratic Polynomial Regression
		reg `var' treat_1 voterecall_lag running_a2 if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year)	
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1 
			di "Number of clusters"
			di e(N_clust) 
	}
	
/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

********************************************************************************
*** TABLE 2: Accountability, Candidate Characteristics and Representation ******
********************************************************************************

// PANELS A and B: Candidate Characteristics //

use "base_main.dta", clear 

global experience "yrs_elected yrs_mayor yrs_partyoffice nationalparty"
global characteristics "work_public work_private age female"	

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_yrs_elected=0.5-0.0880029636998992
	gen bw_below_yrs_partyoffice=0.5-0.1073019610578387
	gen bw_below_yrs_mayor=0.5-0.1532732585154119
	gen bw_below_age=0.5-0.1106420420732029
	gen bw_below_female=0.5-0.2167657325493279
	gen bw_below_nationalparty=0.5-0.1100405863857313
	gen bw_below_work_public=0.5-0.0785272269386206
	gen bw_below_work_private=0.5-0.1135671293426013

	gen bw_above_yrs_elected=0.5+0.0880029636998992
	gen bw_above_yrs_partyoffice=0.5+0.1073019610578387
	gen bw_above_yrs_mayor=0.5+0.1532732585154119
	gen bw_above_age=0.5+0.1106420420732029
	gen bw_above_female=0.5+0.2167657325493279
	gen bw_above_nationalparty=0.5+0.1100405863857313
	gen bw_above_work_public=0.5+0.0785272269386206
	gen bw_above_work_private=0.5+0.1135671293426013

*- Table

	foreach var of varlist $experience $characteristics {
	
		*- Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) cluster(district_year) 
		
		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_1 voterecall_lag if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 
	}
	
// PANEL C: Representation //

use "base_main.dta", clear 
keep if CS07_per_ind_qa!=. & Primerapellido_n==1 & Segundoapellido_n==1

global representation "ind ind2 rep_ind_dnative_25per rep_ind_dnative_50per rep_ind_dnative_75per"

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):

	gen bw_below_ind=0.5-0.0692676838490089
	gen bw_below_ind2=0.5-0.0859836373772625

	gen bw_above_ind=0.5+0.0692676838490089
	gen bw_above_ind2=0.5+0.0859836373772625

	gen bw_below_rep_ind_dnative_25per=0.5-0.0860987825538649
	gen bw_below_rep_ind_dnative_50per=0.5-0.0635948912078365
	gen bw_below_rep_ind_dnative_75per=0.5-0.0853324244210788

	gen bw_above_rep_ind_dnative_25per=0.5+0.0860987825538649
	gen bw_above_rep_ind_dnative_50per=0.5+0.0635948912078365
	gen bw_above_rep_ind_dnative_75per=0.5+0.0853324244210788
	
*- Table

	foreach var of varlist $representation {
		
		*- Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) cluster(district_year) 
	
		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_1 voterecall_lag if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 			
		}
		
		
/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

********************************************************************************
*************************** TABLE 3: Mechanisms ********************************
********************************************************************************

// PANELS A and B: Political Opponents and Performance prior Recall //

use "base_main.dta", clear 
global education "yrs_edu Uni Tec Sec2"	

	*- demeaned variables
	foreach v of varlist RealisedExpenses log_pim_ultimos3 log_ejec_ultimos3 {
		egen m_`v'=mean(`v')
		generate dem_`v' = `v'- m_`v'	  
	}

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Interaction terms
foreach dep of varlist previouselection dem_RealisedExpenses dem_log_pim_ultimos3 dem_log_ejec_ultimos3 {	
	gen treated_`dep'=treat_1*`dep'
}

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_yrs_edu=0.5-0.137498229198215
	gen bw_below_Uni=0.5-0.1577362629911949
	gen bw_below_Tec=0.5-0.118662983718237
	gen bw_below_Sec2=0.5-0.1376469624331328
	
	gen bw_above_yrs_edu=0.5+0.137498229198215
	gen bw_above_Uni=0.5+0.1577362629911949
	gen bw_above_Tec=0.5+0.118662983718237
	gen bw_above_Sec2=0.5+0.1376469624331328

*- Table
	
	foreach var of varlist $education  {
	
		*- PANEL A: Political Opponents Previous Election
		reg `var' treat_1 treated_previouselection previouselection voterecall_lag  l_population if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var',  cluster(district_year)
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 			
			
		*- PANEL B: Performance prior Recall
		reg `var' treat_1 treated_dem_RealisedExpenses dem_RealisedExpenses voterecall_lag  l_population if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var',  cluster(district_year)
			di "Observations and mean dep."
			drop n_`var'
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 			
	}	
	

// PANEL C: Recalled Neighbours //

use "base_neighbours.dta", clear
global education "yrs_edu Uni Tec Sec2"	

	*- Gen neighbour's recall share in t-1 
	foreach var of varlist vecino1_rs-vecino10_rs {
		gen `var'_dif=abs(`var'-.5)
	}

	egen voterecall_lag_vecino_id=rowmin(vecino1_rs_dif-vecino10_rs_dif) // min |voterecall_lag_vecino_i - 0.5| if more than one neighbour with recall

	gen voterecall_lag_vecino=.
		foreach var of varlist vecino1_rs-vecino10_rs {
		replace voterecall_lag_vecino=`var' if  voterecall_lag_vecino_id==`var'_dif 
	}

	*- Gen treatment var
	gen treat_vecino=1 if voterecall_lag_vecino>.5 & voterecall_lag_vecino!=. & voterecall_lag==.
	replace treat_vecino=0 if voterecall_lag_vecino<=.5 & voterecall_lag_vecino!=. & voterecall_lag==. 
	replace voterecall_lag_vecino=. if voterecall_lag!=. 

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):

	gen bw_below_yrs_edu=0.5-0.0502318703919378 
	gen bw_below_Uni=0.5-0.0485165897330795 
	gen bw_below_Tec=0.5-0.1027995031937449 
	gen bw_below_Sec2=0.5-0.0452631239911648 

	gen bw_above_yrs_edu=0.5+0.0502318703919378 
	gen bw_above_Uni=0.5+0.0485165897330795 
	gen bw_above_Tec=0.5+0.1027995031937449 
	gen bw_above_Sec2=0.5+0.0452631239911648 

*- Table

	foreach var of varlist $education {
	
		*- Local Linear Regression
		rd `var' treat_vecino voterecall_lag_vecino, z0(0.5) mbw(100) cluster(district_year) 	

		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_vecino voterecall_lag_vecino if bw_below_`var'<=voterecall_lag_vecino & bw_above_`var'>=voterecall_lag_vecino, cluster(district_year) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 			
	}
	
/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

// PANEL D: Opportunity Costs //

use "base_main.dta", clear 

*- Gen wage
	gen urban=(population>=4)
	gen rural=(population<4)
	gen age_squared=age^2
	gen wage=wage_constant+wage_age*age+wage_age_squared*age_squared+wage_Uni*Uni+wage_Tec*Tec+wage_Sec2*Sec2+wage_male*male+wage_urban*urban

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):

	gen bw_below_wage=0.5-0.1517842379158963 
	gen bw_above_wage=0.5+0.1517842379158963

*- Table
				
	foreach var of varlist wage  {

		*- Linear Polynomial
		reg `var' treat_1 voterecall_lag  if voterecall_lag>=bw_below_`var' & voterecall_lag<=bw_above_`var' , cluster(district_year) 	
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1		
			di "Number of clusters"
			di e(N_clust) 			
		
		*- Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) cluster(district_year) 
	}	

/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */


********************************************************************************
*************** TABLE 4: Accountability and Political Outcomes *****************
********************************************************************************

use "base_main.dta", clear 

*- Effective Number of Candidates
gen ENP=1/sqr_share_valid
label var ENP "Eff. Number Candidates"

*- Collapse sample (district-year level)
sort district_id year 				
duplicates drop district_id year, force		

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_turnout=0.5-0.1152197092579726	
	gen bw_below_candidates_NR=0.5-0.1802263716636345
	gen bw_below_win_margin=0.5-0.1021554346806231
	gen bw_below_ENP=0.5-0.2193263900868382
	
	gen bw_above_turnout=0.5+0.1152197092579726		
	gen bw_above_candidates_NR=0.5+0.1802263716636345
	gen bw_above_win_margin=0.5+0.1021554346806231
	gen bw_above_ENP=0.5+0.2193263900868382
	
*- Table
	
	global political "turnout candidates_NR win_margin ENP"

	foreach var of varlist $political {	
	
		*- Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) 				
		
		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_1 voterecall_lag  if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, vce(robust)
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1					
	}		

/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */


********************************************************************************
**** TABLE 5: Accountability, Winners' Characteristics and Policy Outcomes *****
********************************************************************************

// PANEL A: Winners' Characteristics//

use "base_main.dta", clear 

global education "yrs_edu Uni Tec Sec2"	
global policy "log_ejec_ultimos3 log_pim_ultimos3"

*- Sample of winners
	gen	election_winner =0
	replace election_winner=1 if ELEGIDO=="SI" & inlist(year,2006,2010,2014)
	replace election_winner=1 if winner==1 & year==2002			
	keep if election_winner==1

*- Treatment variable:
	gen treat_1=1 if voterecall_lag>.5 & voterecall_lag!=. 
	replace treat_1=0 if voterecall_lag<=.5 & voterecall_lag!=. 
	tab treat_1
	
*- Polynomials:	
	gen running_a2=(voterecall_lag*voterecall_lag)
	gen running_a3=(voterecall_lag*voterecall_lag*voterecall_lag)
	gen running_a4=(voterecall_lag*voterecall_lag*voterecall_lag*voterecall_lag)

*- Defining optimal bandwidths (Imbens-Kalyanaraman 2012):
	
	gen bw_below_yrs_edu=0.5-0.2083052035863278
	gen bw_below_Uni=0.5-0.1586690632691666
	gen bw_below_Tec=0.5-0.159393638690608
	gen bw_below_Sec2=0.5-0.1218180806213951	
	
	gen bw_above_yrs_edu=0.5+0.2083052035863278
	gen bw_above_Uni=0.5+0.1586690632691666
	gen bw_above_Tec=0.5+0.159393638690608
	gen bw_above_Sec2=0.5+0.1218180806213951

	gen bw_below_log_ejec_ultimos3=0.5-.1186134957561288
	gen bw_below_log_pim_ultimos3=0.5-.0972063042975581
		
	gen bw_above_log_ejec_ultimos3=0.5+.1186134957561288
	gen bw_above_log_pim_ultimos3=0.5+.0972063042975581

*- Table
	
	// PANEL A: Winners' Characteristics	
	foreach var of varlist $education {	
	
		*- Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) 
		
		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_1 voterecall_lag  if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, vce(robust) 
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1					
	}	

	// PANEL B: Policy outcomes
	foreach var of varlist $policy {
	
		*- Local Linear Regression
		rd `var' treat_1 voterecall_lag, z0(0.5) mbw(100) 				
		
		*- Linear Polynomial Regression (for observations and mean dep.)
		reg `var' treat_1 voterecall_lag  if bw_below_`var'<=voterecall_lag & bw_above_`var'>=voterecall_lag, vce(robust)
			di "Observations and mean dep."
			gen n_`var'=e(sample)
			sum `var' if n_`var'==1			
	}		
	
	
/* NOTE: Number of observations, mean dep. and number of clusters need to be 
inserted from polynomial regressions */

log close
